# Look at discrete cases ################################################ ## Important Note: Students are not expected ## ## to be able to come up with the ideas and ## ## programming examples demonstrated on this ## ## script. ## ################################################ # # Here we will look at a situation where there # are a relatively small number of possible # outcomes and the probability of each outcome # is known. Then we will ask questions about # more complex probabilities. # # First we will set up the problem. Let us # say that there are 7 possible outcomes, # each of which is a numeric value. # 5, 8, 11, 12, 19, 23, and 28. # Then we need to know the probability of # getting each outcome in a random event. # # We could express those probabilities as # decimals or as fractions. [Remember that we # can always change from one form to the # other.] For us it will be easier to give # the probabilities as fractions. # P(X=5)=1/6 P(X=8)=1/12 P(X=11)= 1/9 # P(X=12)=7/36 P(x=19)=1/4 P(X=23)=1/18 # P(X=28)=5/36 # In order to deal with this it is # easier if those fractions have a common # denominator. # P(X=5)= 6/36 P(X=8)=3/36 P(X=11)= 4/36 # P(X=12)=7/36 P(x=19)=9/36 P(X=23)= 2/36 # P(X=28)=5/36 # And, of course, the sum of all those # probabilities is 36/36=1 # # We can start by creating an "urn" that has # our values in the proportion given by their # respective probabilities. our_values <- c(5, 8, 11, 12, 19, 23, 28) our_values # # Then we want to specify the desired # proportion, which is just the numerator # of those probabilities when expressed with # a common denominator. # our_proport <- c(6, 3, 4, 7, 9, 2, 5) our_proport # # Then we can fill our urn with the values we # want in the proportion we need our_urn <- rep( our_values, our_proport) our_urn # We could demonstrate the model by taking a # sample of size 1 over and over sample(our_urn, 1) # do this line many times # we could take 36000 samples by getting 1000 # random values in the range from 1 to 36 and # then taking the indicated value from our urn. # runif() is an R function to generate random values # however, those values will be will be real numbers # between the second and third parameters. # we want integer values, so we use as.integer() # to drop off any decimal portion of each # random value which_items <- as.integer(runif(36000,1,37)) # look at the first 50 of those 36000 index values head( which_items,50) # now get our 36000 samples, each of size 1 big_sample <- our_urn[ which_items] # look at the first 50 of those 36000 samples head( big_sample,50) # look at the distribution of values in our # big_sample table( big_sample ) # as you can see, the urn reflects or # desired model. # # What is the expected value? mean( our_urn ) # check that out with reference to the # 36000 items in our big sample mean( big_sample ) # why the difference # I have had some students who worry that our # urn with only 36 items in it may not be representative # of our discrete population. # Therefore, look at a big urn big_urn <- rep( our_urn,1000) table( big_urn ) mean( big_urn) # what is the standard deviation # of values in the urn. For this we # need to find the population standard # deviation. We can do this two ways sd( our_urn )*sqrt(35/36) # or source("../pop_sd.R") pop_sd( our_urn ) # also, note pop_sd( big_urn ) ############################ ## Look at a randomly generated problem # our_values <- sample(-7:19,8, replace=FALSE) our_values # it is easier to follow these if they # have been sorted our_values <- sort( our_values ) our_values # establish some random proportions our_proport <- sample(3:22, 8, replace=FALSE) our_proport sum( our_proport ) our_urn <- rep(our_values, our_proport) our_urn # find the expected value mean( our_urn ) names( our_proport ) <- our_values our_proport our_probabilities <- our_proport/sum(our_proport) our_probabilities # find the standard deviation of this model pop_sd( our_urn )